This document summarizes Rick Gilmore’s analysis of participant sorting data using graph and network analysis tools.
The Jaccard index data are found in analysis/data/jaccard.csv.
jaccard_raw <- readr::read_csv("analysis/data/jaccard.csv")
##
## ── Column specification ────────────────────────────────────────────────────────
## cols(
## Exemplar.Row = col_double(),
## Exemplar.Col = col_double(),
## Jaccard = col_double(),
## Group = col_character()
## )
str(jaccard_raw)
## spec_tbl_df [950 × 4] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
## $ Exemplar.Row: num [1:950] 1 1 1 1 1 1 1 1 1 1 ...
## $ Exemplar.Col: num [1:950] 2 2 2 2 2 3 3 3 3 3 ...
## $ Jaccard : num [1:950] 0.0476 0.1186 0.1228 0.2 0.2692 ...
## $ Group : chr [1:950] "P31M" "P3M1" "P6M" "P6" ...
## - attr(*, "spec")=
## .. cols(
## .. Exemplar.Row = col_double(),
## .. Exemplar.Col = col_double(),
## .. Jaccard = col_double(),
## .. Group = col_character()
## .. )
It’s probably wise to reorder the data frame by wallpaper group, Jaccard index, and exemplar index.
jaccard <- jaccard_raw %>%
dplyr::arrange(., Group, Exemplar.Row, desc(Jaccard))
Let’s add a Jaccard mean and median by Exemplar.Row.
jaccard_aug <- jaccard %>%
dplyr::group_by(., Group, Exemplar.Row) %>%
dplyr::mutate(.,
j_mean = mean(Jaccard),
j_med = median(Jaccard),
j_max = max(Jaccard),
j_min = min(Jaccard)
)
p1 <- jaccard %>%
dplyr::filter(., Group == "P1")
p1_edges <- tibble(from = p1$Exemplar.Row,
to = p1$Exemplar.Col,
weight = p1$Jaccard)
p1_nodes <- tibble(id = 1:20)
p1_network <- network::network(p1_edges, vertex.attr = p1_nodes,
matrix.type = "edgelist", ignore.eval = FALSE,
directed = FALSE)
plot(p1_network, vertex.cex = 3, mode='circle')
Let’s pick the top ten strongest connections.
p1_tidy <- tidygraph::tbl_graph(nodes = p1_nodes, edges = p1_edges,
directed = FALSE)
ggraph::ggraph(p1_tidy) + geom_edge_link() + geom_node_point() + theme_graph()
## Using `stress` as default layout
ggraph(p1_tidy, layout = "graphopt") +
geom_node_point() +
geom_edge_link(aes(width = weight), alpha = 0.8) +
scale_edge_width(range = c(0.2, 2)) +
geom_node_text(aes(label = id), repel = TRUE) +
labs(edge_width = "Jaccard") +
theme_graph()
ggraph(p1_tidy, layout = "linear") +
geom_edge_arc(aes(width = weight), alpha = 0.8) +
scale_edge_width(range = c(0.2, 2)) +
geom_node_text(aes(label = id)) +
labs(edge_width = "Jaccard") +
theme_graph()
Let’s pick the top two exemplars to plot.
p1_e8 <- p1_tidy %>%
activate(edges) %>%
dplyr::filter(., from == 8)
ggraph(p1_e8, layout = "linear") +
geom_edge_arc(aes(width = weight), alpha = 0.8) +
scale_edge_width(range = c(0.2, 2)) +
geom_node_text(aes(label = id)) +
labs(edge_width = "Jaccard") +
theme_graph()
p1_e10 <- p1_tidy %>%
activate(edges) %>%
dplyr::filter(., from == 10 | to == 10)
ggraph(p1_e10, layout = "linear") +
geom_edge_arc(aes(width = weight, color = weight), alpha = 0.8) +
scale_edge_width(range = c(0.2, 2)) +
geom_node_text(aes(label = id)) +
labs(edge_width = "Jaccard") +
theme_graph()
p1_e10 <- p1_tidy %>%
activate(edges) %>%
dplyr::filter(., from == 10 | to == 10)
ggraph(p1_e10, layout = "graphopt") +
geom_edge_link(aes(width = weight, color = weight), alpha = 0.8) +
scale_edge_width(range = c(0.2, 2)) +
geom_node_text(aes(label = id)) +
labs(edge_width = "Jaccard") +
theme_graph()
p1_selected <- p1_tidy %>%
activate(edges) %>%
dplyr::filter(., from == 8 | to == 8)
ggraph(p1_selected, layout = "graphopt") +
geom_edge_link(aes(width = weight, color = weight), alpha = 0.8) +
scale_edge_width(range = c(0.2, 2)) +
geom_node_text(aes(label = id)) +
labs(edge_width = "Jaccard") +
theme_graph()
p1_selected <- p1_tidy %>%
activate(edges) %>%
dplyr::filter(., from == 8 | to == 8)
ggraph(p1_selected, layout = "linear") +
geom_edge_arc(aes(width = weight, color = weight), alpha = 0.8) +
scale_edge_width(range = c(0.2, 2)) +
geom_node_text(aes(label = id)) +
labs(edge_width = "Jaccard") +
theme_graph()
p1_selected <- p1_tidy %>%
activate(edges) %>%
dplyr::filter(., from == 10 | to == 10)
ggraph(p1_selected, layout = "linear") +
geom_edge_arc(aes(width = weight, color = weight), alpha = 0.8) +
scale_edge_width(range = c(0.2, 2)) +
geom_node_text(aes(label = id)) +
labs(edge_width = "Jaccard") +
theme_graph()
jaccard_aug %>%
dplyr::filter(., Group == "P1") %>%
dplyr::arrange(., desc(j_mean))
## # A tibble: 190 x 8
## # Groups: Group, Exemplar.Row [19]
## Exemplar.Row Exemplar.Col Jaccard Group j_mean j_med j_max j_min
## <dbl> <dbl> <dbl> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 19 20 0.32 P1 0.32 0.32 0.32 0.32
## 2 16 20 0.375 P1 0.244 0.26 0.375 0.0820
## 3 16 17 0.32 P1 0.244 0.26 0.375 0.0820
## 4 16 19 0.2 P1 0.244 0.26 0.375 0.0820
## 5 16 18 0.0820 P1 0.244 0.26 0.375 0.0820
## 6 10 16 0.404 P1 0.237 0.222 0.404 0.0820
## 7 10 15 0.347 P1 0.237 0.222 0.404 0.0820
## 8 10 20 0.347 P1 0.237 0.222 0.404 0.0820
## 9 10 19 0.269 P1 0.237 0.222 0.404 0.0820
## 10 10 12 0.222 P1 0.237 0.222 0.404 0.0820
## # … with 180 more rows
It looks like exemplars 19 and 16 are are among the highest.
jaccard_aug %>%
dplyr::filter(., Group == "P1") %>%
dplyr::arrange(., j_mean)
## # A tibble: 190 x 8
## # Groups: Group, Exemplar.Row [19]
## Exemplar.Row Exemplar.Col Jaccard Group j_mean j_med j_max j_min
## <dbl> <dbl> <dbl> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 18 19 0.1 P1 0.1 0.1 0.1 0.1
## 2 18 20 0.1 P1 0.1 0.1 0.1 0.1
## 3 11 13 0.226 P1 0.168 0.182 0.226 0.0833
## 4 11 17 0.226 P1 0.168 0.182 0.226 0.0833
## 5 11 14 0.204 P1 0.168 0.182 0.226 0.0833
## 6 11 15 0.182 P1 0.168 0.182 0.226 0.0833
## 7 11 18 0.182 P1 0.168 0.182 0.226 0.0833
## 8 11 20 0.182 P1 0.168 0.182 0.226 0.0833
## 9 11 16 0.121 P1 0.168 0.182 0.226 0.0833
## 10 11 12 0.102 P1 0.168 0.182 0.226 0.0833
## # … with 180 more rows
18 and 11 among the lowest
p1_selected <- p1_tidy %>%
activate(edges) %>%
dplyr::filter(., from == 18 | to == 18)
ggraph(p1_selected, layout = "linear") +
geom_edge_arc(aes(width = weight, color = weight), alpha = 0.8) +
scale_edge_width(range = c(0.2, 2)) +
geom_node_text(aes(label = id)) +
labs(edge_width = "Jaccard") +
theme_graph()
p1_selected <- p1_tidy %>%
activate(edges) %>%
dplyr::filter(., from == 19 | to == 19)
g <- ggraph(p1_selected, layout = "linear") +
geom_edge_arc(aes(width = weight, color = weight), alpha = 0.8) +
scale_edge_width(range = c(0.1, 4), limits = c(0, .6)) +
geom_node_text(aes(label = id)) +
labs(edge_width = "Jaccard") +
theme_graph()
g
p1_selected <- p1_tidy %>%
activate(edges) %>%
dplyr::filter(., from == 19 | to == 19)
p1_selected <- p1_selected %>%
dplyr::mutate(weight = cut(weight, c(0, .1, .2, .3, .4, .5, .6)))
g <- ggraph(p1_selected, layout = "linear", circular = TRUE) +
geom_edge_arc(aes(color = factor(weight))) +
geom_node_text(aes(label = id)) +
labs(edge_width = "Jaccard") +
theme_graph()
g
plot_jaccard_vals <- function(df, exemplar_id, group,
j_stat_type = "mean",
j_stat_val = NA) {
df <- df %>%
activate(edges) %>%
dplyr::filter(., from == exemplar_id | to == exemplar_id) %>%
dplyr::mutate(weight = cut(weight, c(0, .2, .4, .6, .8),
labels = c("<.2", ".2-.4", ".4-.6", ">.6")))
f_stat <- format(j_stat_val, digits = 2, nsmall = 2)
ggraph(df, layout = "linear", circular = TRUE) +
geom_edge_arc(aes(color = weight)) +
geom_node_text(aes(label = id)) +
ggtitle(paste0(group, " | # ", exemplar_id, " | ", j_stat_type, " Jaccard ", f_stat)) +
theme_graph() +
coord_fixed()
}
plot_jaccard_vals(p1_tidy, 11, "P1")
plot_jaccard_vals_2 <- function(df, exemplar_id, group,
j_stat_type = "mean",
j_stat_val = NA) {
df <- df %>%
activate(edges) %>%
dplyr::filter(., from == exemplar_id | to == exemplar_id)
f_stat <- format(j_stat_val, digits = 2, nsmall = 2)
ggraph(df, layout = "linear", circular = TRUE) +
geom_edge_arc(aes(color = weight)) +
geom_node_text(aes(label = id)) +
ggtitle(paste0(group, " | # ", exemplar_id, " | ", j_stat_type, " Jaccard ", f_stat)) +
theme_graph() +
coord_fixed()
}
plot_jaccard_vals_2(p1_tidy, 11, "P1")
plot_jaccard_vals_3 <- function(df, exemplar_id, group,
j_stat_type = "mean",
j_stat_val = NA) {
df <- df %>%
activate(edges) %>%
dplyr::filter(., from == exemplar_id | to == exemplar_id) %>%
dplyr::mutate(weight = cut(weight, c(0, .2, .4, .6, .8),
labels = c("<.2", ".2-.4", ".4-.6", ">.6")))
f_stat <- format(j_stat_val, digits = 2, nsmall = 2)
ggraph(df, layout = "linear", circular = TRUE) +
geom_edge_arc(aes(linetype = factor(weight),
color = factor(weight))) +
geom_node_text(aes(label = id)) +
ggtitle(paste0(group, " | # ", exemplar_id, " | ", j_stat_type, " Jaccard ", f_stat)) +
theme_graph() +
coord_fixed()
}
plot_jaccard_vals_3(p1_tidy, 11, "P1")
wp_graph <- function(df, group) {
out_df <- df %>%
dplyr::filter(., Group == group)
df_edges <- tibble(from = out_df$Exemplar.Row,
to = out_df$Exemplar.Col,
weight = out_df$Jaccard)
df_nodes <- tibble(id = 1:20)
tidygraph::tbl_graph(nodes = df_nodes,
edges = df_edges,
directed = FALSE)
}
jaccard_stats <- function(jaccard) {
jaccard %>%
dplyr::mutate(., exemplar_pair = paste0(Exemplar.Row, "-", Exemplar.Col)) %>%
dplyr::group_by(., Group) %>%
dplyr::summarise(
.,
Jaccard_mean = mean(Jaccard),
Jaccard_med = median(Jaccard),
Jaccard_max = max(Jaccard),
Jaccard_min = min(Jaccard),
Exemplar.Row = Exemplar.Row,
Jaccard = Jaccard,
exemplar_pair = exemplar_pair
)
}
Test jaccard_stats().
graph <- wp_graph(jaccard, "P31M")
j_stats <- jaccard_stats(jaccard_raw)
## `summarise()` has grouped output by 'Group'. You can override using the `.groups` argument.
Define some helper functions to pick extremes of mean(Jaccard), max(Jaccard), and min(Jaccard).
These no longer seem quite right, so I am setting eval=FALSE on this section of code.
pick_extreme_mean_exemplars <- function(j_stats, group, hi_lo = "hi", n_exemplars = 1) {
this_group <- j_stats %>%
dplyr::filter(., Group == group)
if (hi_lo == "hi") {
this_group <- this_group %>%
dplyr::arrange(., desc(Jaccard_mean))
} else {
this_group <- this_group %>%
dplyr::arrange(., Jaccard_mean)
}
this_group$Exemplar.Row[1:n_exemplars]
}
pick_extreme_max_exemplars <- function(j_stats, group, hi_lo = "hi", n_exemplars = 1) {
this_group <- j_stats %>%
dplyr::filter(., Group == group)
if (hi_lo == "hi") {
this_group <- this_group %>%
dplyr::arrange(., desc(Jaccard_max))
} else {
this_group <- this_group %>%
dplyr::arrange(., Jaccard_max)
}
this_group$Exemplar.Row[1:n_exemplars]
}
pick_extreme_min_exemplars <- function(j_stats, group, hi_lo = "hi", n_exemplars = 1) {
this_group <- j_stats %>%
dplyr::filter(., Group == group)
if (hi_lo == "hi") {
this_group <- this_group %>%
dplyr::arrange(., desc(Jaccard_min))
} else {
this_group <- this_group %>%
dplyr::arrange(., Jaccard_min)
}
this_group$Exemplar.Row[1:n_exemplars]
}
jaccard <- jaccard_raw %>%
dplyr::arrange(., Group, Exemplar.Row, desc(Jaccard))
j_stats <- jaccard_stats(jaccard_raw)
## `summarise()` has grouped output by 'Group'. You can override using the `.groups` argument.
this_group = "P31M"
this_graph <- wp_graph(jaccard, this_group)
exemplars_w_max_jaccard <- j_stats %>%
dplyr::filter(., Group == this_group,
Jaccard_max == Jaccard)
exemplars_w_min_jaccard <- j_stats %>%
dplyr::filter(., Group == this_group,
Jaccard_min == Jaccard)
plot_jaccard_vals_3(this_graph,
as.character(exemplars_w_max_jaccard[1, 'Exemplar.Row']),
"P31M",
"max",
as.numeric(exemplars_w_max_jaccard[1, 'Jaccard']))
plot_jaccard_vals_3(this_graph,
as.character(exemplars_w_min_jaccard[1, 'Exemplar.Row']),
"P31M",
"min",
as.numeric(exemplars_w_min_jaccard[1, 'Jaccard']))
# Jaccard heatmaps
Let’s try to visualize the Jaccard indices as a heatmap.
heatmap() requires a matrix, so we have to convert jaccard to a matrix.
Let’s pick one of the wp groups to make our lives easier.
p31m <- jaccard %>%
dplyr::filter(., Group == "P31M")
Let’s see if we can assign values using Exemplar.Row and Exemplar.Col.
p31m_matrix <- matrix(nrow = 20, ncol = 20)
for (r in 1:190) {
p31m_matrix[p31m$Exemplar.Row[r], p31m$Exemplar.Col[r]] <- p31m$Jaccard[r]
}
# # Add identity values
# for (r in 1:20) {
# p31m_matrix[r, r] <- 1
# }
p31m_matrix
## [,1] [,2] [,3] [,4] [,5] [,6] [,7]
## [1,] NA 0.04761905 0.137931 0.13793103 0.17857143 0.15789474 0.10000000
## [2,] NA NA 0.031250 0.24528302 0.13793103 0.06451613 0.65000000
## [3,] NA NA NA 0.06451613 0.24528302 0.08196721 0.06451613
## [4,] NA NA NA NA 0.06451613 0.13793103 0.15789474
## [5,] NA NA NA NA NA 0.11864407 0.20000000
## [6,] NA NA NA NA NA NA 0.04761905
## [7,] NA NA NA NA NA NA NA
## [8,] NA NA NA NA NA NA NA
## [9,] NA NA NA NA NA NA NA
## [10,] NA NA NA NA NA NA NA
## [11,] NA NA NA NA NA NA NA
## [12,] NA NA NA NA NA NA NA
## [13,] NA NA NA NA NA NA NA
## [14,] NA NA NA NA NA NA NA
## [15,] NA NA NA NA NA NA NA
## [16,] NA NA NA NA NA NA NA
## [17,] NA NA NA NA NA NA NA
## [18,] NA NA NA NA NA NA NA
## [19,] NA NA NA NA NA NA NA
## [20,] NA NA NA NA NA NA NA
## [,8] [,9] [,10] [,11] [,12] [,13]
## [1,] 0.24528302 0.22222222 0.11864407 0.17857143 0.1379310 0.13793103
## [2,] 0.03125000 0.24528302 0.08196721 0.03125000 0.1578947 0.06451613
## [3,] 0.20000000 0.11864407 0.24528302 0.15789474 0.1186441 0.17857143
## [4,] 0.11864407 0.46666667 0.17857143 0.17857143 0.1186441 0.15789474
## [5,] 0.15789474 0.08196721 0.11864407 0.11864407 0.2222222 0.06451613
## [6,] 0.20000000 0.04761905 0.13793103 0.24528302 0.1785714 0.22222222
## [7,] 0.08196721 0.15789474 0.10000000 0.01538462 0.1785714 0.10000000
## [8,] NA 0.06451613 0.15789474 0.10000000 0.2452830 0.17857143
## [9,] NA NA 0.20000000 0.13793103 0.1000000 0.10000000
## [10,] NA NA NA 0.24528302 0.1000000 0.29411765
## [11,] NA NA NA NA 0.1000000 0.17857143
## [12,] NA NA NA NA NA 0.11864407
## [13,] NA NA NA NA NA NA
## [14,] NA NA NA NA NA NA
## [15,] NA NA NA NA NA NA
## [16,] NA NA NA NA NA NA
## [17,] NA NA NA NA NA NA
## [18,] NA NA NA NA NA NA
## [19,] NA NA NA NA NA NA
## [20,] NA NA NA NA NA NA
## [,14] [,15] [,16] [,17] [,18] [,19]
## [1,] 0.06451613 0.22222222 0.11864407 0.20000000 0.08196721 0.20000000
## [2,] 0.43478261 0.03125000 0.08196721 0.22222222 0.10000000 0.06451613
## [3,] 0.04761905 0.15789474 0.17857143 0.11864407 0.17857143 0.22222222
## [4,] 0.29411765 0.11864407 0.13793103 0.11864407 0.20000000 0.11864407
## [5,] 0.06451613 0.11864407 0.10000000 0.10000000 0.24528302 0.04761905
## [6,] 0.10000000 0.26923077 0.34693878 0.17857143 0.20000000 0.32000000
## [7,] 0.34693878 0.04761905 0.06451613 0.17857143 0.10000000 0.08196721
## [8,] 0.06451613 0.34693878 0.20000000 0.08196721 0.15789474 0.26923077
## [9,] 0.34693878 0.08196721 0.10000000 0.15789474 0.15789474 0.15789474
## [10,] 0.10000000 0.24528302 0.26923077 0.15789474 0.11864407 0.15789474
## [11,] 0.10000000 0.26923077 0.29411765 0.15789474 0.11864407 0.13793103
## [12,] 0.06451613 0.22222222 0.08196721 0.06451613 0.13793103 0.10000000
## [13,] 0.11864407 0.13793103 0.26923077 0.17857143 0.29411765 0.26923077
## [14,] NA 0.08196721 0.11864407 0.15789474 0.15789474 0.11864407
## [15,] NA NA 0.15789474 0.11864407 0.11864407 0.29411765
## [16,] NA NA NA 0.26923077 0.20000000 0.22222222
## [17,] NA NA NA NA 0.08196721 0.15789474
## [18,] NA NA NA NA NA 0.15789474
## [19,] NA NA NA NA NA NA
## [20,] NA NA NA NA NA NA
## [,20]
## [1,] 0.04761905
## [2,] 0.43478261
## [3,] 0.04761905
## [4,] 0.24528302
## [5,] 0.06451613
## [6,] 0.08196721
## [7,] 0.34693878
## [8,] 0.04761905
## [9,] 0.26923077
## [10,] 0.13793103
## [11,] 0.04761905
## [12,] 0.11864407
## [13,] 0.13793103
## [14,] 0.50000000
## [15,] 0.06451613
## [16,] 0.13793103
## [17,] 0.20000000
## [18,] 0.15789474
## [19,] 0.13793103
## [20,] NA
Ok, lets’ try heatmap.
heatmap(p31m_matrix, Rowv = NA, Colv = NA, symm = TRUE, col = cm.colors(256))
Create a function to do this for each WP group.
plot_wp_heatmap <- function(df, group) {
this_df <- df %>%
dplyr::filter(., Group %in% group)
this_matrix <- matrix(nrow = 20*length(group), ncol = 20)
for (r in 1:190) {
this_matrix[this_df$Exemplar.Row[r], this_df$Exemplar.Col[r]] <-
this_df$Jaccard[r]
}
heatmap(this_matrix,
Rowv = NA,
Colv = NA,
symm = TRUE,
main = group,
col= colorRampPalette(RColorBrewer::brewer.pal(3, "Oranges"))(3))
legend(x="bottomright",
legend=c("low", "mid", "high"),
fill=colorRampPalette(RColorBrewer::brewer.pal(3, "Oranges"))(3))
}
Let’s try it.
plot_wp_heatmap(jaccard, "P1")
plot_wp_heatmap(jaccard, "P31M")
plot_wp_heatmap(jaccard, "P3M1")
plot_wp_heatmap(jaccard, "P6")
plot_wp_heatmap(jaccard, "P6M")
Or, there’s a
tidyHeatmap package.
p31_heatmap <-
p31m %>%
tidyHeatmap::heatmap(Exemplar.Row, Exemplar.Col, Jaccard )
## tidyHeatmap says: (once per session) from release 1.2.3 the grouping labels have white background by default. To add color for one-ay grouping specify palette_grouping = list(c("red", "blue"))
p31_heatmap